Creating new data

The contents will appear later

This section will contain hands-on material that will appear later.

import geopandas as gpd
import osmnx
import rioxarray
import tobler
import xarray as xr
import xvec

from libpysal import graph
from sklearn import neighbors

Map matching

Nearest join

simd = gpd.read_file("data/edinburgh_simd_2020.gpkg")
simd.geometry.explore()
Make this Notebook Trusted to load map: File -> Trust Notebook
extent = simd.to_crs(4326).unary_union
pois = osmnx.features_from_polygon(extent, tags={"amenity": ["restaurant", "bar"]})
pois.head(2)
addr:city addr:housenumber addr:postcode addr:street amenity cuisine name phone source wheelchair ... contact:tripadvisor level:ref operator:wikidata alt_name:zh nodes building building:levels building:material roof:levels roof:shape
element_type osmid
node 21042679 Edinburgh 41 EH8 9NZ South Clerk Street restaurant chinese Happy Hot Pot +44 131 667 6337 survey no ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
29173148 Edinburgh NaN EH4 6DY Queensferry Road restaurant steak Miller & Carter +44 131 339 4350 survey NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

2 rows × 144 columns

pois = pois.reset_index()[
    [
        "osmid",
        "amenity",
        "name",
        "geometry",
    ]
].to_crs(simd.crs)
pois.head(2)
osmid amenity name geometry
0 21042679 restaurant Happy Hot Pot POINT (326363.790 672581.659)
1 29173148 restaurant Miller & Carter POINT (317864.915 675479.475)
pois.explore("amenity", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
airbnb = gpd.read_file("data/edinburgh_airbnb_2023.gpkg")
airbnb.price.head()
0    $126.00
1    $269.00
2     $95.00
3    $172.00
4    $361.00
Name: price, dtype: object
airbnb["price_float"] = airbnb.price.str.strip("$").str.replace(",", "").astype(float)
two_bed_homes = airbnb[
    (airbnb["bedrooms"] == 2)
    & (airbnb["property_type"] == "Entire rental unit")
    & (airbnb["price_float"] < 300)
].copy()
two_bed_homes.head()
id bedrooms property_type price geometry price_float
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683) 172.0
5 834777 2.0 Entire rental unit $264.00 POINT (324950.724 673875.598) 264.0
6 450745 2.0 Entire rental unit $177.00 POINT (326493.725 672853.904) 177.0
10 485856 2.0 Entire rental unit $157.00 POINT (326597.124 673869.551) 157.0
17 51505 2.0 Entire rental unit $155.00 POINT (325393.807 674177.409) 155.0
airbnb_pubs = two_bed_homes.sjoin_nearest(pois, how="left", max_distance=100)
airbnb_pubs.head()
id bedrooms property_type price geometry price_float index_right osmid amenity name
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683) 172.0 NaN NaN NaN NaN
5 834777 2.0 Entire rental unit $264.00 POINT (324950.724 673875.598) 264.0 658.0 4.114822e+09 bar Badger & Co
6 450745 2.0 Entire rental unit $177.00 POINT (326493.725 672853.904) 177.0 32.0 5.043315e+08 restaurant Blonde
10 485856 2.0 Entire rental unit $157.00 POINT (326597.124 673869.551) 157.0 550.0 3.071953e+09 restaurant Oink Hog Roast
17 51505 2.0 Entire rental unit $155.00 POINT (325393.807 674177.409) 155.0 589.0 3.274256e+09 bar Bramble
airbnb_pubs_unlimited = two_bed_homes.sjoin_nearest(
    pois, how="left", max_distance=None, distance_col="distance"
)
airbnb_pubs_unlimited.head()
id bedrooms property_type price geometry price_float index_right osmid amenity name distance
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683) 172.0 692 4758881738 restaurant The Holyrood Room Restaurant 193.470980
5 834777 2.0 Entire rental unit $264.00 POINT (324950.724 673875.598) 264.0 658 4114821564 bar Badger & Co 13.631934
6 450745 2.0 Entire rental unit $177.00 POINT (326493.725 672853.904) 177.0 32 504331543 restaurant Blonde 88.364090
10 485856 2.0 Entire rental unit $157.00 POINT (326597.124 673869.551) 157.0 550 3071953161 restaurant Oink Hog Roast 24.399283
17 51505 2.0 Entire rental unit $155.00 POINT (325393.807 674177.409) 155.0 589 3274255605 bar Bramble 72.692519
airbnb_pubs_unlimited.explore("distance", tiles="CartoDB Positron", scheme="naturalbreaks")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook

Count nearby features

pois_buffered = pois.set_geometry(pois.buffer(500))
pois_buffered.head()
osmid amenity name geometry
0 21042679 restaurant Happy Hot Pot POLYGON ((326863.790 672581.659, 326861.382 67...
1 29173148 restaurant Miller & Carter POLYGON ((318364.915 675479.475, 318362.507 67...
2 44551391 restaurant Ciao Roma POLYGON ((326531.916 673348.014, 326529.509 67...
3 50503871 restaurant Urban Angel POLYGON ((325818.638 674176.293, 325816.231 67...
4 50503874 restaurant La Garrigue POLYGON ((326593.951 673765.317, 326591.544 67...
within_buffer = two_bed_homes.sjoin(pois_buffered)
counts = within_buffer.groupby("id")["osmid"].count()
counts.head()
id
51505     110
97472       9
118314      2
163870     29
216245     47
Name: osmid, dtype: int64
airbnb_w_count = two_bed_homes.merge(counts, on="id")
airbnb_w_count.head()
id bedrooms property_type price geometry price_float osmid
0 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683) 172.0 7
1 834777 2.0 Entire rental unit $264.00 POINT (324950.724 673875.598) 264.0 120
2 450745 2.0 Entire rental unit $177.00 POINT (326493.725 672853.904) 177.0 47
3 485856 2.0 Entire rental unit $157.00 POINT (326597.124 673869.551) 157.0 17
4 51505 2.0 Entire rental unit $155.00 POINT (325393.807 674177.409) 155.0 110
airbnb_w_count.explore("osmid", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook

Extract from a raster

population = rioxarray.open_rasterio("data/edinburgh_population.tif", masked=True).squeeze()
population
<xarray.DataArray (y: 221, x: 236)>
[52156 values with dtype=float64]
Coordinates:
    band         int64 1
  * x            (x) float64 3.096e+05 3.097e+05 ... 3.329e+05 3.33e+05
  * y            (y) float64 6.809e+05 6.808e+05 ... 6.591e+05 6.59e+05
    spatial_ref  int64 0
Attributes:
    AREA_OR_POINT:  Area
    scale_factor:   1.0
    add_offset:     0.0
population.plot()
<matplotlib.collections.QuadMesh at 0x7fce22dde050>

pop_cube = population.drop_vars(["band", "spatial_ref"]).xvec.extract_points(
    two_bed_homes.geometry, "x", "y"
)
pop_cube.name = "population"
pop_cube
<xarray.DataArray 'population' (geometry: 1212)>
[1212 values with dtype=float64]
Coordinates:
  * geometry  (geometry) object POINT (326748.64563561964 674001.6832108775) ...
    index     (geometry) int64 3 5 6 10 17 19 ... 7669 7671 7672 7677 7680 7684
Indexes:
    geometry  GeometryIndex (crs=EPSG:27700)
Attributes:
    AREA_OR_POINT:  Area
    scale_factor:   1.0
    add_offset:     0.0
pop = pop_cube.xvec.to_geodataframe()
pop.head()
geometry index population
0 POINT (326748.646 674001.683) 3 2.000207
1 POINT (324950.724 673875.598) 5 15.106922
2 POINT (326493.725 672853.904) 6 176.059799
3 POINT (326597.124 673869.551) 10 15.964920
4 POINT (325393.807 674177.409) 17 17.008703
pop.explore("population", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
two_bed_homes["population"] = pop.population

Area interpolation

simd = gpd.read_file("data/edinburgh_simd_2020.gpkg")
grid = tobler.util.h3fy(simd, resolution=8)
grid.head(2)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/pyproj/crs/crs.py:1293: UserWarning: You will likely lose important projection information when converting to a PROJ string from another format. See: https://proj.org/faq.html#what-is-the-best-format-for-describing-coordinate-reference-systems
  proj = self._crs.to_proj4(version=version)
geometry
hex_id
8819727587fffff POLYGON ((315204.837 665386.180, 314740.392 66...
881972675bfffff POLYGON ((315051.574 661661.196, 314587.029 66...
ax = simd.boundary.plot(linewidth=.5)
grid.boundary.plot(ax=ax, color="red", linewidth=.5)
<Axes: >

interpolated = tobler.area_weighted.area_interpolate(
    simd,
    grid,
    extensive_variables=["EmpNumDep", "IncNumDep"],
    intensive_variables=["EmpRate", "IncRate"],
)
interpolated.explore("EmpRate", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
simd[["EmpRate", "geometry"]].explore("EmpRate", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook

Point interpolation

two_bed_homes.explore("price_float", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
interrpolation_model = neighbors.KNeighborsRegressor(n_neighbors=10, weights="distance")
coords = two_bed_homes.get_coordinates()
coords.head()
x y
3 326748.645636 674001.683211
5 324950.723888 673875.598033
6 326493.725178 672853.903917
10 326597.123684 673869.551295
17 325393.807222 674177.408961
interrpolation_model.fit(
    coords, two_bed_homes.price_float
)
KNeighborsRegressor(n_neighbors=10, weights='distance')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
grid_coordinates = grid.centroid.get_coordinates()
grid_coordinates.head()
x y
hex_id
8819727587fffff 315127.615117 664904.741910
881972675bfffff 314974.336174 661179.542360
8819727627fffff 320461.899805 673264.202086
8819723927fffff 326416.235455 671673.366871
8819727569fffff 312570.031896 673867.901209
price_on_grid = interrpolation_model.predict(grid_coordinates)
grid["price"] = price_on_grid
grid.explore("price", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
Tip

https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.griddata.html

https://pyinterpolate.readthedocs.io/en/stable/index.html

text here

Map synthesis

Count neighbours

distance_500 = graph.Graph.build_distance_band(two_bed_homes, 500)
distance_500.cardinalities.head()
focal
3     59
5     67
6     35
10    70
17    66
Name: cardinalities, dtype: int64
two_bed_homes["within 500m"] = distance_500.cardinalities
two_bed_homes.explore("within 500m", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook

Lagged variables

distance_500_row = distance_500.transform("r")
two_bed_homes["price_lag"] = distance_500_row.lag(two_bed_homes.price_float)
two_bed_homes.explore("price_lag", tiles="CartoDB Positron")
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/geopandas/explore.py:400: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  elif pd.api.types.is_categorical_dtype(gdf[column]):
Make this Notebook Trusted to load map: File -> Trust Notebook
distance_500_row[3].head()
neighbor
10     0.016949
150    0.016949
232    0.016949
333    0.016949
528    0.016949
Name: weight, dtype: float64
distance_500_gaussian = graph.Graph.build_distance_band(two_bed_homes, 500, binary=False)
/home/runner/micromamba/envs/sds/lib/python3.11/site-packages/libpysal/graph/base.py:656: RuntimeWarning: divide by zero encountered in power
  kernel=lambda distances, alpha: np.power(distances, alpha),
distance_500_gaussian[3].head()
neighbor
10     0.004974
150    0.002261
232    0.005490
333    0.002426
528    0.002451
Name: weight, dtype: float64
distance_500_gaussian.transform("r")[3].head()
neighbor
10     0.028768
150    0.013077
232    0.031754
333    0.014028
528    0.014177
Name: weight, dtype: float64
two_bed_homes["price_lag_w"] = distance_500_gaussian.transform("r").lag(
    two_bed_homes.price_float
)
two_bed_homes.head()
id bedrooms property_type price geometry price_float population within 500m price_lag price_lag_w
3 821573 2.0 Entire rental unit $172.00 POINT (326748.646 674001.683) 172.0 15.964920 59 185.423729 189.633253
5 834777 2.0 Entire rental unit $264.00 POINT (324950.724 673875.598) 264.0 56.425114 67 177.119403 164.360323
6 450745 2.0 Entire rental unit $177.00 POINT (326493.725 672853.904) 177.0 140.532318 35 141.171429 136.599523
10 485856 2.0 Entire rental unit $157.00 POINT (326597.124 673869.551) 157.0 299.309570 70 190.785714 197.843603
17 51505 2.0 Entire rental unit $155.00 POINT (325393.807 674177.409) 155.0 57.981670 66 206.015152 209.656652